Loading the library
library(Seurat)
Setting the working directory
setwd("C:/Users/savill/OneDrive/Documents/PhD Jesse/R-Objects")
Loading the data
# load my data
Gastruloid_96h_merged_afterQC <- readRDS("./df96v2.rds")
Gastruloid_96h_merged_afterQC
An object of class Seurat
37316 features across 2056 samples within 2 assays
Active assay: RNA (20235 features, 2000 variable features)
1 other assay present: SCT
2 dimensional reductions calculated: pca, umap
DefaultAssay(Gastruloid_96h_merged_afterQC) <- "SCT"
Gastruloid_96h_merged_afterQC <- SetIdent(Gastruloid_96h_merged_afterQC, value = "predicted.id")
Getting the metadata and taking a peak:
metadata <- Gastruloid_96h_merged_afterQC@meta.data
head(metadata)
Get all the columns so we can delete the old morphology info:
colnames(metadata)
[1] "orig.ident" "nCount_RNA" "nFeature_RNA" "Sample.barcode" "Morphotype" "percent.mt"
[7] "RNA_snn_res.0.3" "seurat_clusters" "S.Score" "G2M.Score" "Phase" "old.ident"
[13] "nCount_SCT" "nFeature_SCT" "predicted.id" "prediction.score.37" "prediction.score.19" "prediction.score.8"
[19] "prediction.score.3" "prediction.score.17" "prediction.score.13" "prediction.score.12" "prediction.score.27" "prediction.score.38"
[25] "prediction.score.36" "prediction.score.7" "prediction.score.2" "prediction.score.11" "prediction.score.20" "prediction.score.16"
[31] "prediction.score.1" "prediction.score.6" "prediction.score.26" "prediction.score.18" "prediction.score.9" "prediction.score.23"
[37] "prediction.score.21" "prediction.score.32" "prediction.score.30" "prediction.score.35" "prediction.score.22" "prediction.score.31"
[43] "prediction.score.24" "prediction.score.39" "prediction.score.33" "prediction.score.34" "prediction.score.max" "Area"
[49] "Perim" "Major" "Minor" "Feret" "MinFeret" "AR"
[55] "Round" "Solidity" "BRA_Area" "BRA_Mean" "BRA_Major" "BRA_Minor"
[61] "BRA_Circ" "BRA_AR" "BRA_Round" "BRA_Solidity" "BRA_Fraction" "percent.ribo"
[67] "RNA_snn_res.0.5" "RNA_snn_res.0.4" "sub_cluster" "RNA_snn_res.0.6" "RNA_snn_res.0.7" "RNA_snn_res.0.8"
[73] "glycolysis1" "oxphos1" "BRA_Fraction_bin" "glycolysis_score" "glycolysis_score_quantile" "oxphos_score_quantile"
[79] "oxlbk_score_quantile"
all_features = colnames(metadata)
old_morpho_features = c("Area","Perim","Major","Minor","Feret","MinFeret","AR","Round","Solidity","BRA_Area","BRA_Mean","BRA_Major","BRA_Minor","BRA_Circ","BRA_AR","BRA_Round","BRA_Solidity","BRA_Fraction")
metadata_old_removed = metadata[!(all_features %in% old_morpho_features)]
#index_name = "row_names"
#metadata_old_removed[, index_name] <- rownames(metadata_old_removed)
metadata_old_removed
NA
Now we can move on to loading the new features and adding them to the metadata:
my_data_96h = read.csv("Morphodata_All_Features_Sequenced_096h.csv")
dim(my_data_96h)
[1] 24 537
my_data_96h
Adding the identifier so we can merge the dataframes:
# if this is how we add the barcode identifyer it is CRITICAL that the order is the same as in the spreadsheet
# This means A1-A12 INSTEAD of A10,A11,A12,A1,A2 (which is the default order in python and pandas)
barcoder_identifier = "bar"
my_data_96h[barcoder_identifier] <- paste("Bar", c(1:24), sep="")
my_data_96h
Here I am basically excluding the columns we don’t want to add, assuming that we want to add all columns that are in the .csv file. Alternatively we could specify which columns (or which features) we want to add to the seurat object manually. No matter which way it is done we would have to add the sample barcode to be able to merge the data with the existing metadata.
all_columns = colnames(my_data_96h)
columns_we_dont_need = c('Sample','Multi_BC','Multi_BC_#','Plate','Morphotype_120h','Axes','Morpho_120h','Morphotype_96h',barcoder_identifier)
columns_we_need = all_columns[!(all_columns %in% columns_we_dont_need)]
for (feature_name in columns_we_need) {
metadata_old_removed[feature_name]<- my_data_96h[[feature_name]][match(metadata_old_removed$Sample.barcode, my_data_96h[[barcoder_identifier]])]
}
metadata_old_removed